import gzip
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
redfin_df = pd.read_csv(r"C:\Users\wilke\Downloads\redfin_metro_market_tracker.tsv000.gz",
compression='gzip',sep = '\t',header = 0,quotechar='"')
oh_bool = []
for i in redfin_df['parent_metro_region']:
bool = 'OH' in i
oh_bool.append(bool)
oh_metrics = redfin_df[oh_bool]
oh_metrics.sort_values(['parent_metro_region','period_begin'])
| period_begin | period_end | period_duration | region_type | region_type_id | table_id | is_seasonally_adjusted | region | city | state | ... | sold_above_list_yoy | price_drops | price_drops_mom | price_drops_yoy | off_market_in_two_weeks | off_market_in_two_weeks_mom | off_market_in_two_weeks_yoy | parent_metro_region | parent_metro_region_metro_code | last_updated | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 13072 | 2012-01-01 | 2012-01-31 | 30 | metro | -2 | 10420 | f | Akron, OH metro area | NaN | NaN | ... | -0.031343 | NaN | NaN | NaN | 0.032086 | -0.017707 | 0.021275 | Akron, OH | 10420 | 2023-06-12 18:59:53 |
| 155928 | 2012-01-01 | 2012-01-31 | 30 | metro | -2 | 10420 | f | Akron, OH metro area | NaN | NaN | ... | -0.125000 | NaN | NaN | NaN | 0.000000 | 0.000000 | 0.000000 | Akron, OH | 10420 | 2023-06-12 18:59:53 |
| 189842 | 2012-01-01 | 2012-01-31 | 30 | metro | -2 | 10420 | f | Akron, OH metro area | NaN | NaN | ... | 0.070000 | NaN | NaN | NaN | 0.000000 | -0.071429 | 0.000000 | Akron, OH | 10420 | 2023-06-12 18:59:53 |
| 246828 | 2012-01-01 | 2012-01-31 | 30 | metro | -2 | 10420 | f | Akron, OH metro area | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Akron, OH | 10420 | 2023-06-12 18:59:53 |
| 330225 | 2012-01-01 | 2012-01-31 | 30 | metro | -2 | 10420 | f | Akron, OH metro area | NaN | NaN | ... | -0.035880 | NaN | NaN | NaN | 0.036364 | -0.014095 | 0.024388 | Akron, OH | 10420 | 2023-06-12 18:59:53 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 213821 | 2023-04-01 | 2023-04-30 | 30 | metro | -2 | 49780 | f | Zanesville, OH metro area | NaN | NaN | ... | -0.189678 | 0.223881 | 0.018752 | 0.118617 | 0.347222 | 0.019353 | 0.049925 | Zanesville, OH | 49780 | 2023-06-12 18:59:53 |
| 78549 | 2023-05-01 | 2023-05-31 | 30 | metro | -2 | 49780 | f | Zanesville, OH metro area | NaN | NaN | ... | -0.086996 | 0.246753 | 0.039857 | 0.045570 | 0.400000 | 0.062162 | 0.034146 | Zanesville, OH | 49780 | 2023-06-12 18:59:53 |
| 154272 | 2023-05-01 | 2023-05-31 | 30 | metro | -2 | 49780 | f | Zanesville, OH metro area | NaN | NaN | ... | 0.000000 | 0.200000 | NaN | NaN | 0.666667 | 0.666667 | 0.666667 | Zanesville, OH | 49780 | 2023-06-12 18:59:53 |
| 312699 | 2023-05-01 | 2023-05-31 | 30 | metro | -2 | 49780 | f | Zanesville, OH metro area | NaN | NaN | ... | -0.074074 | 0.244755 | 0.020875 | 0.032255 | 0.381579 | 0.034357 | 0.027149 | Zanesville, OH | 49780 | 2023-06-12 18:59:53 |
| 317645 | 2023-05-01 | 2023-05-31 | 30 | metro | -2 | 49780 | f | Zanesville, OH metro area | NaN | NaN | ... | -1.000000 | 0.500000 | NaN | NaN | 1.000000 | NaN | 0.000000 | Zanesville, OH | 49780 | 2023-06-12 18:59:53 |
18561 rows × 58 columns
oh_metrics.columns
Index(['period_begin', 'period_end', 'period_duration', 'region_type',
'region_type_id', 'table_id', 'is_seasonally_adjusted', 'region',
'city', 'state', 'state_code', 'property_type', 'property_type_id',
'median_sale_price', 'median_sale_price_mom', 'median_sale_price_yoy',
'median_list_price', 'median_list_price_mom', 'median_list_price_yoy',
'median_ppsf', 'median_ppsf_mom', 'median_ppsf_yoy', 'median_list_ppsf',
'median_list_ppsf_mom', 'median_list_ppsf_yoy', 'homes_sold',
'homes_sold_mom', 'homes_sold_yoy', 'pending_sales',
'pending_sales_mom', 'pending_sales_yoy', 'new_listings',
'new_listings_mom', 'new_listings_yoy', 'inventory', 'inventory_mom',
'inventory_yoy', 'months_of_supply', 'months_of_supply_mom',
'months_of_supply_yoy', 'median_dom', 'median_dom_mom',
'median_dom_yoy', 'avg_sale_to_list', 'avg_sale_to_list_mom',
'avg_sale_to_list_yoy', 'sold_above_list', 'sold_above_list_mom',
'sold_above_list_yoy', 'price_drops', 'price_drops_mom',
'price_drops_yoy', 'off_market_in_two_weeks',
'off_market_in_two_weeks_mom', 'off_market_in_two_weeks_yoy',
'parent_metro_region', 'parent_metro_region_metro_code',
'last_updated'],
dtype='object')
plt.figure(figsize = (20,20))
sns.heatmap(oh_metrics.corr())
C:\Users\wilke\AppData\Local\Temp\ipykernel_42968\1925643702.py:2: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning. sns.heatmap(oh_metrics.corr())
<Axes: >
metrics_df = oh_metrics[['period_begin',
'property_type',
'parent_metro_region',
'median_sale_price',
'median_sale_price_mom',
'median_sale_price_yoy',
'median_list_price',
'median_list_price_yoy',
'homes_sold',
'homes_sold_mom',
'homes_sold_yoy',
'median_list_ppsf',
'median_list_ppsf_mom',
'median_list_ppsf_yoy',
'inventory',
'inventory_mom',
'inventory_yoy',
'homes_sold',
'homes_sold_mom',
'homes_sold_yoy',
'avg_sale_to_list',
'sold_above_list',
'sold_above_list_mom',
'sold_above_list_yoy',
'off_market_in_two_weeks',
'price_drops',
'pending_sales',
'pending_sales_mom',
'pending_sales_yoy'
]]
ind = 1
plt.figure(figsize=(40,40))
for i in metrics_df.columns:
plt.subplot(len(metrics_df.columns),2,ind)
plt.hist(metrics_df[i])
plt.title(i)
ind += 1
pd.options.display.float_format = '{:.10f}'.format
metrics_df.describe()
| median_sale_price | median_sale_price_mom | median_sale_price_yoy | median_list_price | median_list_price_yoy | homes_sold | homes_sold_mom | homes_sold_yoy | median_list_ppsf | median_list_ppsf_mom | ... | homes_sold_yoy | avg_sale_to_list | sold_above_list | sold_above_list_mom | sold_above_list_yoy | off_market_in_two_weeks | price_drops | pending_sales | pending_sales_mom | pending_sales_yoy | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 18561.0000000000 | 16939.0000000000 | 16795.0000000000 | 17344.0000000000 | 15853.0000000000 | 18561.0000000000 | 16939.0000000000 | 16795.0000000000 | 17261.0000000000 | 15932.0000000000 | ... | 16795.0000000000 | 18513.0000000000 | 18561.0000000000 | 16939.0000000000 | 16795.0000000000 | 16963.0000000000 | 7169.0000000000 | 16963.0000000000 | 15409.0000000000 | 15190.0000000000 |
| mean | 123316.5856365498 | 2.6377743553 | 0.8916561840 | 130962.6048489391 | 0.1396328997 | 174.1885674263 | 0.1326050315 | 0.2479267285 | 85.1312645735 | 0.0635229108 | ... | 0.2479267285 | 0.9505730064 | 0.1698789102 | 0.0013200587 | 0.0190135428 | 0.1666603519 | 0.1961144420 | 146.4138418912 | 0.1445931898 | 0.3313157016 |
| std | 55459.4255516965 | 320.8536340744 | 81.2828784318 | 55850.5597926305 | 0.9093015223 | 489.2566480623 | 0.8071128567 | 1.1948822229 | 34.8803575591 | 1.1781774974 | ... | 1.1948822229 | 0.0543684326 | 0.1863047256 | 0.1885512968 | 0.1962365221 | 0.2075858628 | 0.1235232663 | 415.3071894373 | 0.7492046057 | 1.3550770078 |
| min | 1.0000000000 | -0.9999722222 | -0.9999560440 | 1200.0000000000 | -0.9499893594 | 1.0000000000 | -0.9230769231 | -0.9600000000 | 0.1944273436 | -0.9895371339 | ... | -0.9600000000 | 0.5003335557 | 0.0000000000 | -1.0000000000 | -1.0000000000 | 0.0000000000 | 0.0000725268 | 1.0000000000 | -0.9285714286 | -0.9230769231 |
| 25% | 88000.0000000000 | -0.1115323163 | -0.0582759969 | 94900.0000000000 | -0.0563340006 | 4.0000000000 | -0.1875000000 | -0.1304347826 | 64.6925566343 | -0.0730573835 | ... | -0.1304347826 | 0.9337706239 | 0.0000000000 | -0.0430191470 | -0.0283190404 | 0.0000000000 | 0.1282798834 | 4.0000000000 | -0.2000000000 | -0.1176470588 |
| 50% | 120000.0000000000 | 0.0055865922 | 0.0714285714 | 124997.5000000000 | 0.0606428138 | 24.0000000000 | 0.0000000000 | 0.0209059233 | 83.2407407407 | 0.0046513262 | ... | 0.0209059233 | 0.9577922377 | 0.1290322581 | 0.0000000000 | 0.0006609385 | 0.0909090909 | 0.1787164907 | 18.0000000000 | 0.0000000000 | 0.0833333333 |
| 75% | 152000.0000000000 | 0.1392399267 | 0.2307692308 | 158462.5000000000 | 0.1915384615 | 66.0000000000 | 0.2307692308 | 0.2692307692 | 104.1666666667 | 0.0946080366 | ... | 0.2692307692 | 0.9781873140 | 0.2444444444 | 0.0479819069 | 0.0727897036 | 0.2551667378 | 0.2352941176 | 52.0000000000 | 0.2727272727 | 0.4000000000 |
| max | 1486000.0000000000 | 41749.0000000000 | 10499.0000000000 | 1100000.0000000000 | 72.7083333333 | 3599.0000000000 | 30.0000000000 | 32.0000000000 | 982.7898550725 | 133.2155634735 | ... | 32.0000000000 | 1.4875884434 | 1.0000000000 | 1.0000000000 | 1.0000000000 | 1.0000000000 | 1.0000000000 | 3591.0000000000 | 17.3333333333 | 52.0000000000 |
8 rows × 26 columns
metrics_df['parent_metro_region'].unique()
array(['Mansfield, OH', 'Springfield, OH', 'Dayton, OH', 'Tiffin, OH',
'Marietta, OH', 'Toledo, OH', 'Ashtabula, OH', 'Celina, OH',
'Norwalk, OH', 'Columbus, OH', 'Zanesville, OH', 'Lima, OH',
'Washington Court House, OH', 'Chillicothe, OH', 'Findlay, OH',
'Mount Vernon, OH', 'Sandusky, OH', 'Akron, OH', 'Urbana, OH',
'Salem, OH', 'Marion, OH', 'Greenville, OH', 'Wilmington, OH',
'Cambridge, OH', 'Ashland, OH', 'Canton, OH', 'Cincinnati, OH',
'New Philadelphia, OH', 'Wooster, OH', 'Wapakoneta, OH',
'Coshocton, OH', 'Bucyrus, OH', 'Sidney, OH', 'Defiance, OH',
'Bellefontaine, OH', 'Fremont, OH', 'Athens, OH', 'Van Wert, OH',
'Jackson, OH', 'Youngstown, OH', 'Portsmouth, OH', 'Cleveland, OH'],
dtype=object)
dayton_market = metrics_df[metrics_df['parent_metro_region'] == 'Dayton, OH']
columbus_market = metrics_df[metrics_df['parent_metro_region'] == 'Columbus, OH']
cincinnati_market = metrics_df[metrics_df['parent_metro_region'] == 'Cincinnati, OH']
cleveland_market = metrics_df[metrics_df['parent_metro_region'] == 'Cleveland, OH']
plt.hist(dayton_market['property_type'])
(array([137., 0., 137., 0., 0., 137., 0., 128., 0., 137.]), array([0. , 0.4, 0.8, 1.2, 1.6, 2. , 2.4, 2.8, 3.2, 3.6, 4. ]), <BarContainer object of 10 artists>)
dayton_townhouse = dayton_market[dayton_market['property_type'] == 'Townhouse'].sort_values('period_begin')
dayton_family_home = dayton_market[dayton_market['property_type'] == 'Single Family Residential'].sort_values('period_begin')
dayton_condo = dayton_market[dayton_market['property_type'] == 'Condo/Co-op'].sort_values('period_begin')
dayton_multi_family = dayton_market[dayton_market['property_type'] == 'Multi-Family (2-4 Unit)'].sort_values('period_begin')
dayton_residential = dayton_market[dayton_market['property_type'] == 'All Residential'].sort_values('period_begin')
metrics = [ 'median_sale_price',
'inventory',
'median_list_price',
'homes_sold',
'avg_sale_to_list',
'sold_above_list',
'homes_sold',
'median_list_ppsf',
'pending_sales',
'off_market_in_two_weeks',
'price_drops',
]
fig, ax = plt.subplots(11,1,sharex = True,figsize = (20,30))
fig.tight_layout(pad = 5)
ax = ax.flatten()
for ind,i in enumerate(metrics):
ax[ind].plot(dayton_townhouse['period_begin'],dayton_townhouse[i])
ax[ind].set_title(i)
ax[ind].set_xticklabels(dayton_townhouse['period_begin'],rotation = 90)
C:\Users\wilke\AppData\Local\Temp\ipykernel_42968\3672131972.py:25: UserWarning: FixedFormatter should only be used together with FixedLocator ax[ind].set_xticklabels(dayton_townhouse['period_begin'],rotation = 90)
metrics = [ 'median_sale_price',
'inventory',
'median_list_price',
'homes_sold',
'avg_sale_to_list',
'sold_above_list',
'homes_sold',
'median_list_ppsf',
'pending_sales',
'off_market_in_two_weeks',
'price_drops',
]
fig, ax = plt.subplots(11,1,sharex = True,figsize = (20,30))
fig.tight_layout(pad = 5)
ax = ax.flatten()
for ind,i in enumerate(metrics):
ax[ind].plot(dayton_family_home['period_begin'],dayton_family_home[i])
ax[ind].set_title(i)
ax[ind].set_xticklabels(dayton_family_home['period_begin'],rotation = 90)
C:\Users\wilke\AppData\Local\Temp\ipykernel_42968\2519425690.py:25: UserWarning: FixedFormatter should only be used together with FixedLocator ax[ind].set_xticklabels(dayton_family_home['period_begin'],rotation = 90)
metrics = [ 'median_sale_price',
'inventory',
'median_list_price',
'homes_sold',
'avg_sale_to_list',
'sold_above_list',
'homes_sold',
'median_list_ppsf',
'pending_sales',
'off_market_in_two_weeks',
'price_drops',
]
fig, ax = plt.subplots(11,1,sharex = True,figsize = (20,30))
fig.tight_layout(pad = 5)
ax = ax.flatten()
for ind,i in enumerate(metrics):
ax[ind].plot(dayton_condo['period_begin'],dayton_condo[i])
ax[ind].set_title(i)
ax[ind].set_xticklabels(dayton_condo['period_begin'],rotation = 90)
C:\Users\wilke\AppData\Local\Temp\ipykernel_42968\1762790433.py:25: UserWarning: FixedFormatter should only be used together with FixedLocator ax[ind].set_xticklabels(dayton_condo['period_begin'],rotation = 90)
metrics = [ 'median_sale_price',
'inventory',
'median_list_price',
'homes_sold',
'avg_sale_to_list',
'sold_above_list',
'homes_sold',
'median_list_ppsf',
'pending_sales',
'off_market_in_two_weeks',
'price_drops',
]
fig, ax = plt.subplots(11,1,sharex = True,figsize = (20,30))
fig.tight_layout(pad = 5)
ax = ax.flatten()
for ind,i in enumerate(metrics):
ax[ind].plot(dayton_multi_family['period_begin'],dayton_multi_family[i])
ax[ind].set_title(i)
ax[ind].set_xticklabels(dayton_multi_family['period_begin'],rotation = 90)
C:\Users\wilke\AppData\Local\Temp\ipykernel_42968\2919694956.py:25: UserWarning: FixedFormatter should only be used together with FixedLocator ax[ind].set_xticklabels(dayton_multi_family['period_begin'],rotation = 90)
metrics = [ 'median_sale_price',
'inventory',
'median_list_price',
'homes_sold',
'avg_sale_to_list',
'sold_above_list',
'homes_sold',
'median_list_ppsf',
'pending_sales',
'off_market_in_two_weeks',
'price_drops',
]
fig, ax = plt.subplots(11,1,sharex = True,figsize = (20,30))
fig.tight_layout(pad = 5)
ax = ax.flatten()
for ind,i in enumerate(metrics):
ax[ind].plot(dayton_residential['period_begin'],dayton_residential[i])
ax[ind].set_title(i)
ax[ind].set_xticklabels(dayton_residential['period_begin'],rotation = 90)
C:\Users\wilke\AppData\Local\Temp\ipykernel_42968\3061486996.py:25: UserWarning: FixedFormatter should only be used together with FixedLocator ax[ind].set_xticklabels(dayton_residential['period_begin'],rotation = 90)
'median_sale_price_mom',
'median_sale_price_yoy',
'median_list_price_yoy',
'inventory_mom',
'inventory_yoy',
'homes_sold_mom',
'homes_sold_yoy',
'avg_sale_to_list',
'sold_above_list',
'sold_above_list_mom',
'sold_above_list_yoy',
'off_market_in_two_weeks',
'price_drops',
'pending_sales',
'pending_sales_mom',
'pending_sales_yoy'
'median_list_ppsf_yoy'
'median_list_ppsf_mom'
metrics_mom = [ 'median_sale_price_mom',
'inventory_mom',
'homes_sold_mom',
'sold_above_list_mom',
'median_list_ppsf_mom',
'pending_sales_mom'
]
fig, ax = plt.subplots(6,1,sharex = True,figsize = (20,30))
fig.tight_layout(pad = 5)
ax = ax.flatten()
for ind,i in enumerate(metrics_mom):
ax[ind].plot(dayton_condo['period_begin'],dayton_condo[i])
ax[ind].set_title(i)
ax[ind].set_xticklabels(dayton_condo['period_begin'],rotation = 90)
C:\Users\wilke\AppData\Local\Temp\ipykernel_42968\1222752665.py:20: UserWarning: FixedFormatter should only be used together with FixedLocator ax[ind].set_xticklabels(dayton_condo['period_begin'],rotation = 90)
metrics_mom = [ 'median_sale_price_mom',
'inventory_mom',
'homes_sold_mom',
'sold_above_list_mom',
'median_list_ppsf_mom',
'pending_sales_mom'
]
fig, ax = plt.subplots(6,1,sharex = True,figsize = (20,30))
fig.tight_layout(pad = 5)
ax = ax.flatten()
for ind,i in enumerate(metrics_mom):
ax[ind].plot(dayton_condo['period_begin'],dayton_condo[i])
ax[ind].set_title(i)
ax[ind].set_xticklabels(dayton_condo['period_begin'],rotation = 90)
dayton_townhouse.columns
Index(['period_begin', 'property_type', 'parent_metro_region',
'median_sale_price', 'median_sale_price_mom', 'median_sale_price_yoy',
'median_list_price', 'median_list_price_yoy', 'homes_sold',
'homes_sold_mom', 'homes_sold_yoy', 'median_list_ppsf',
'median_list_ppsf_mom', 'median_list_ppsf_yoy', 'inventory',
'inventory_mom', 'inventory_yoy', 'homes_sold', 'homes_sold_mom',
'homes_sold_yoy', 'avg_sale_to_list', 'sold_above_list',
'sold_above_list_mom', 'sold_above_list_yoy', 'off_market_in_two_weeks',
'price_drops', 'pending_sales', 'pending_sales_mom',
'pending_sales_yoy'],
dtype='object')